Incremental fixes to plx stuff.
Signed-off-by: andrew.warfield@cl.cam.ac.uk
42090340_mvZtozMjghPJO0qsjk4NQ tools/blktap/blkint.h
42090340rc2q1wmlGn6HtiJAkqhtNQ tools/blktap/blktaplib.c
42090340C-WkRPT7N3t-8Lzehzogdw tools/blktap/blktaplib.h
+423f270cAbkh2f-DHtT0hmCtFFXVXg tools/blktap/blockstore-tls.c
42277b02WrfP1meTDPv1M5swFq8oHQ tools/blktap/blockstore.c
42277b02P1C0FYj3gqwTZUD8sxKCug tools/blktap/blockstore.h
42090340B3mDvcxvd9ehDHUkg46hvw tools/blktap/libgnbd/Makefile
42090340ZWkc5Xhf9lpQmDON8HJXww tools/blktap/libgnbd/gnbdtest.c
42090340ocMiUScJE3OpY7QNunvSbg tools/blktap/libgnbd/libgnbd.c
42090340G5_F_EeVnPORKB0pTMGGhA tools/blktap/libgnbd/libgnbd.h
+423f270cbEKiTMapKnCyqkuwGvgOMA tools/blktap/parallax-threaded.c
+423f270cFdXryIcD7HTPUl_Dbk4DAQ tools/blktap/parallax-threaded.h
42277b03930x2TJT3PZlw6o0GERXpw tools/blktap/parallax.c
42277b03XQYq8bujXSz7JAZ8N7j_pA tools/blktap/radix.c
42277b03vZ4-jno_mgKmAcCW3ycRAg tools/blktap/radix.h
42277b04xB_iUmiSm6nKcy8OV8bckA tools/blktap/vdi_fill.c
42277b045CJGD_rKH-ZT_-0X4knhWA tools/blktap/vdi_list.c
42277b043ZKx0NJSbcgptQctQ5rerg tools/blktap/vdi_snap.c
+423f270c_QDjGLQ_YdaOtyBM5n9BDg tools/blktap/vdi_snap_delete.c
42277b043Fjy5-H7LyBtUPyDlZFo6A tools/blktap/vdi_snap_list.c
42277b04vhqD6Lq3WmGbaESoAAKdhw tools/blktap/vdi_tree.c
42277b047H8fTVyUf75BWAjh6Zpsqg tools/blktap/vdi_validate.c
PLX_SRCS :=
PLX_SRCS += vdi.c
PLX_SRCS += radix.c
-PLX_SRCS += blockstore.c
PLX_SRCS += snaplog.c
+PLXT_SRCS := $(PLX_SRCS)
+#PLXT_SRCS += blockstore-tls.c
+PLXT_SRCS += blockstore.c
+PLXT_SRCS += parallax-threaded.c
+PLX_SRCS += blockstore.c
VDI_SRCS := $(PLX_SRCS)
PLX_SRCS += parallax.c
VDI_TOOLS += vdi_list
VDI_TOOLS += vdi_snap
VDI_TOOLS += vdi_snap_list
+VDI_TOOLS += vdi_snap_delete
VDI_TOOLS += vdi_fill
VDI_TOOLS += vdi_tree
VDI_TOOLS += vdi_validate
libblktap.so.$(MAJOR):
ln -sf libblktap.so.$(MAJOR).$(MINOR) $@
libblktap.so.$(MAJOR).$(MINOR): $(OBJS)
- $(CC) -Wl,-soname -Wl,$(SONAME) -shared -o $@ $^ -L../libxutil -lxutil -lz
+ $(CC) -Wl,-soname -Wl,$(SONAME) -shared -o $@ $^ -lpthread -L../libxutil -lxutil -lz
blkdump: $(LIB)
$(CC) $(CFLAGS) -o blkdump -L$(XEN_LIBXC) -L$(XEN_LIBXUTIL) -L. -l blktap blkdump.c
parallax: $(LIB) $(PLX_SRCS)
$(CC) $(CFLAGS) -o parallax -L$(XEN_LIBXC) -L$(XEN_LIBXUTIL) -L. -lblktap $(PLX_SRCS) libgnbd/libgnbd.a
+parallax-threaded: $(LIB) $(PLXT_SRCS)
+ $(CC) $(CFLAGS) -o parallax-threaded -L$(XEN_LIBXC) -L$(XEN_LIBXUTIL) -L. -lpthread -lblktap $(PLXT_SRCS) libgnbd/libgnbd.a
+
vdi_test: $(LIB) $(VDI_SRCS)
$(CC) $(CFLAGS) -g3 -o vdi_test -DVDI_STANDALONE $(VDI_SRCS)
vdi_snap_list: $(LIB) vdi_snap_list.c $(VDI_SRCS)
$(CC) $(CFLAGS) -g3 -o vdi_snap_list vdi_snap_list.c $(VDI_SRCS)
+vdi_snap_delete: $(LIB) vdi_snap_delete.c $(VDI_SRCS)
+ $(CC) $(CFLAGS) -g3 -o vdi_snap_delete vdi_snap_delete.c $(VDI_SRCS)
+
vdi_tree: $(LIB) vdi_tree.c $(VDI_SRCS)
$(CC) $(CFLAGS) -g3 -o vdi_tree vdi_tree.c $(VDI_SRCS)
$(CC) $(CFLAGS) -g3 -o vdi_validate vdi_validate.c $(VDI_SRCS)
-rdx_cmp: $(LIB) rdx_cmp.c $(VDI_SRCS)
- $(CC) $(CFLAGS) -g3 -o rdx_cmp rdx_cmp.c $(VDI_SRCS)
-
-
.PHONY: TAGS clean install mk-symlinks rpm
TAGS:
etags -t $(SRCS) *.h
-include $(DEPS)
+
+#Random testing targets. To be removed eventually.
+
+rdx_cmp: $(LIB) rdx_cmp.c $(VDI_SRCS)
+ $(CC) $(CFLAGS) -g3 -o rdx_cmp rdx_cmp.c $(VDI_SRCS)
+
+bb-tls: $(LIB) blockstore-benchmark.c
+ $(CC) $(CFLAGS) -o bb-tls blockstore-benchmark.c blockstore-tls.c -lpthread
+
+bb-trans: $(LIB) blockstore-benchmark.c
+ $(CC) $(CFLAGS) -o bb-trans blockstore-benchmark.c blockstore.c -lpthread
+
+radix-test: $(LIB) radix.c blockstore-threaded-trans.c
+ $(CC) $(CFLAGS) -g3 -D RADIX_STANDALONE -o radix-test radix.c blockstore-threaded-trans.c
*
* userspace interface routines for the blktap driver.
*
+ * (threadsafe(r) version)
+ *
* (c) 2004 Andrew Warfield.
*/
#include <sys/ioctl.h>
#include <string.h>
#include <unistd.h>
+#include <pthread.h>
+
#define __COMPILING_BLKTAP_LIB
#include "blktaplib.h"
-#if 1
+#if 0
#define DPRINTF(_f, _a...) printf ( _f , ## _a )
#else
#define DPRINTF(_f, _a...) ((void)0)
/*-----[ Data to/from Backend (server) VM ]------------------------------*/
+
+
inline int write_req_to_be_ring(blkif_request_t *req)
{
blkif_request_t *req_d;
+ static pthread_mutex_t be_prod_mutex = PTHREAD_MUTEX_INITIALIZER;
- //req_d = FRONT_RING_NEXT_EMPTY_REQUEST(&be_ring);
+ pthread_mutex_lock(&be_prod_mutex);
req_d = RING_GET_REQUEST(&be_ring, be_ring.req_prod_pvt);
memcpy(req_d, req, sizeof(blkif_request_t));
wmb();
be_ring.req_prod_pvt++;
+ pthread_mutex_unlock(&be_prod_mutex);
return 0;
}
inline int write_rsp_to_fe_ring(blkif_response_t *rsp)
{
blkif_response_t *rsp_d;
+ static pthread_mutex_t fe_prod_mutex = PTHREAD_MUTEX_INITIALIZER;
- //rsp_d = BACK_RING_NEXT_EMPTY_RESPONSE(&fe_ring);
+ pthread_mutex_lock(&fe_prod_mutex);
rsp_d = RING_GET_RESPONSE(&fe_ring, fe_ring.rsp_prod_pvt);
memcpy(rsp_d, rsp, sizeof(blkif_response_t));
wmb();
fe_ring.rsp_prod_pvt++;
+ pthread_mutex_unlock(&fe_prod_mutex);
return 0;
}
ctrl_sring_t *csring;
RING_IDX rp, i, pfd_count;
+ /* pending rings */
+ blkif_request_t req_pending[BLKIF_RING_SIZE];
+ blkif_response_t rsp_pending[BLKIF_RING_SIZE];
+
/* handler hooks: */
request_hook_t *req_hook;
response_hook_t *rsp_hook;
int done = 0; /* stop forwarding this request */
req = RING_GET_REQUEST(&fe_ring, i);
+ memcpy(&req_pending[ID_TO_IDX(req->id)], req, sizeof(*req));
+ req = &req_pending[ID_TO_IDX(req->id)];
DPRINTF("copying an fe request\n");
{
rsp = RING_GET_RESPONSE(&be_ring, i);
+ memcpy(&rsp_pending[ID_TO_IDX(rsp->id)], rsp, sizeof(*rsp));
+ rsp = &rsp_pending[ID_TO_IDX(rsp->id)];
DPRINTF("copying a be request\n");
--- /dev/null
+/**************************************************************************
+ *
+ * blockstore.c
+ *
+ * Simple block store interface
+ *
+ */
+
+#include <fcntl.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <pthread.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include "blockstore.h"
+#include "parallax-threaded.h"
+
+/*static int block_fp = -1;*/
+
+static int fd_list[READ_POOL_SIZE+1];
+
+/**
+ * readblock: read a block from disk
+ * @id: block id to read
+ *
+ * @return: pointer to block, NULL on error
+ */
+
+void *readblock(u64 id)
+{
+ void *block;
+ int tid = (int)pthread_getspecific(tid_key);
+
+ if (lseek64(fd_list[tid], ((off64_t) id - 1LL) * BLOCK_SIZE, SEEK_SET) < 0) {
+ printf ("%Ld\n", (id - 1) * BLOCK_SIZE);
+ perror("readblock lseek");
+ goto err;
+ }
+ if ((block = malloc(BLOCK_SIZE)) == NULL) {
+ perror("readblock malloc");
+ goto err;
+ }
+ if (read(fd_list[tid], block, BLOCK_SIZE) != BLOCK_SIZE) {
+ perror("readblock read");
+ free(block);
+ goto err;
+ }
+ return block;
+
+err:
+ return NULL;
+}
+
+/**
+ * writeblock: write an existing block to disk
+ * @id: block id
+ * @block: pointer to block
+ *
+ * @return: zero on success, -1 on failure
+ */
+int writeblock(u64 id, void *block)
+{
+ int tid = (int)pthread_getspecific(tid_key);
+
+ if (lseek64(fd_list[tid], ((off64_t) id - 1LL) * BLOCK_SIZE, SEEK_SET) < 0) {
+ perror("writeblock lseek");
+ goto err;
+ }
+ if (write(fd_list[tid], block, BLOCK_SIZE) < 0) {
+ perror("writeblock write");
+ goto err;
+ }
+ return 0;
+
+err:
+ return -1;
+}
+
+/**
+ * allocblock: write a new block to disk
+ * @block: pointer to block
+ *
+ * @return: new id of block on disk
+ */
+
+u64 allocblock(void *block)
+{
+ u64 lb;
+ off64_t pos;
+ int tid = (int)pthread_getspecific(tid_key);
+
+ pos = lseek64(fd_list[tid], 0, SEEK_END);
+ if (pos == (off64_t)-1) {
+ perror("allocblock lseek");
+ goto err;
+ }
+ if (pos % BLOCK_SIZE != 0) {
+ fprintf(stderr, "file size not multiple of %d\n", BLOCK_SIZE);
+ goto err;
+ }
+ if (write(fd_list[tid], block, BLOCK_SIZE) != BLOCK_SIZE) {
+ perror("allocblock write");
+ goto err;
+ }
+ lb = pos / BLOCK_SIZE + 1;
+
+ return lb;
+
+err:
+ return 0;
+
+}
+
+
+/**
+ * newblock: get a new in-memory block set to zeros
+ *
+ * @return: pointer to new block, NULL on error
+ */
+void *newblock()
+{
+ void *block = malloc(BLOCK_SIZE);
+ if (block == NULL) {
+ perror("newblock");
+ return NULL;
+ }
+ memset(block, 0, BLOCK_SIZE);
+ return block;
+}
+
+
+/**
+ * freeblock: unallocate an in-memory block
+ * @id: block id (zero if this is only in-memory)
+ * @block: block to be freed
+ */
+void freeblock(void *block)
+{
+ if (block != NULL)
+ free(block);
+}
+
+
+int __init_blockstore(void)
+{
+ int i;
+
+ for (i=0; i<(READ_POOL_SIZE+1); i++) {
+
+ fd_list[i] = open("blockstore.dat",
+ O_RDWR | O_CREAT | O_LARGEFILE, 0644);
+
+ if (fd_list[i] < 0) {
+ perror("open");
+ return -1;
+ }
+ }
+ return 0;
+}
#include <sys/types.h>
#include <sys/stat.h>
#include "blockstore.h"
+#include "parallax-threaded.h"
-static int block_fp = -1;
+/*static int block_fp = -1;*/
+
+static int fd_list[READ_POOL_SIZE+1];
/**
* readblock: read a block from disk
void *readblock(u64 id) {
void *block;
+ int block_fp;
+
+ block_fp = open("blockstore.dat", O_RDONLY | O_CREAT | O_LARGEFILE, 0644);
+
+ if (block_fp < 0) {
+ perror("open");
+ return NULL;
+ }
+
if (lseek64(block_fp, ((off64_t) id - 1LL) * BLOCK_SIZE, SEEK_SET) < 0) {
+ printf ("%Ld ", id);
printf ("%Ld\n", (id - 1) * BLOCK_SIZE);
perror("readblock lseek");
- return NULL;
+ goto err;
}
if ((block = malloc(BLOCK_SIZE)) == NULL) {
perror("readblock malloc");
- return NULL;
+ goto err;
}
if (read(block_fp, block, BLOCK_SIZE) != BLOCK_SIZE) {
perror("readblock read");
free(block);
- return NULL;
+ goto err;
}
+ close(block_fp);
return block;
+
+err:
+ close(block_fp);
+ return NULL;
}
/**
* @return: zero on success, -1 on failure
*/
int writeblock(u64 id, void *block) {
+
+ int block_fp;
+
+ block_fp = open("blockstore.dat", O_RDWR | O_CREAT | O_LARGEFILE, 0644);
+
+ if (block_fp < 0) {
+ perror("open");
+ return -1;
+ }
+
if (lseek64(block_fp, ((off64_t) id - 1LL) * BLOCK_SIZE, SEEK_SET) < 0) {
perror("writeblock lseek");
- return -1;
+ goto err;
}
if (write(block_fp, block, BLOCK_SIZE) < 0) {
perror("writeblock write");
- return -1;
+ goto err;
}
+ close(block_fp);
return 0;
+
+err:
+ close(block_fp);
+ return -1;
}
/**
*
* @return: new id of block on disk
*/
-static u64 lastblock = 0;
u64 allocblock(void *block) {
u64 lb;
- off64_t pos = lseek64(block_fp, 0, SEEK_END);
+ off64_t pos;
+ int block_fp;
+
+ block_fp = open("blockstore.dat", O_RDWR | O_CREAT | O_LARGEFILE, 0644);
+
+ if (block_fp < 0) {
+ perror("open");
+ return 0;
+ }
+
+ pos = lseek64(block_fp, 0, SEEK_END);
if (pos == (off64_t)-1) {
perror("allocblock lseek");
- return 0;
+ goto err;
}
if (pos % BLOCK_SIZE != 0) {
fprintf(stderr, "file size not multiple of %d\n", BLOCK_SIZE);
- return 0;
+ goto err;
}
if (write(block_fp, block, BLOCK_SIZE) != BLOCK_SIZE) {
perror("allocblock write");
- return 0;
+ goto err;
}
lb = pos / BLOCK_SIZE + 1;
+//printf("alloc(%Ld)\n", lb);
+ close(block_fp);
+ return lb;
- if (lb <= lastblock)
- printf("[*** %Ld alredy allocated! ***]\n", lb);
+err:
+ close(block_fp);
+ return 0;
- lastblock = lb;
- return lb;
}
free(block);
}
+static freeblock_t *new_freeblock(void)
+{
+ freeblock_t *fb;
+
+ fb = newblock();
+
+ if (fb == NULL) return NULL;
+
+ fb->magic = FREEBLOCK_MAGIC;
+ fb->next = 0ULL;
+ fb->count = 0ULL;
+ memset(fb->list, 0, sizeof fb->list);
+
+ return fb;
+}
+
+void releaseblock(u64 id)
+{
+ blockstore_super_t *bs_super;
+ freeblock_t *fl_current;
+
+ /* get superblock */
+ bs_super = (blockstore_super_t *) readblock(BLOCKSTORE_SUPER);
+
+ /* get freeblock_current */
+ if (bs_super->freelist_current == 0ULL)
+ {
+ fl_current = new_freeblock();
+ bs_super->freelist_current = allocblock(fl_current);
+ writeblock(BLOCKSTORE_SUPER, bs_super);
+ } else {
+ fl_current = readblock(bs_super->freelist_current);
+ }
+
+ /* if full, chain to superblock and allocate new current */
+
+ if (fl_current->count == FREEBLOCK_SIZE) {
+ fl_current->next = bs_super->freelist_full;
+ writeblock(bs_super->freelist_current, fl_current);
+ bs_super->freelist_full = bs_super->freelist_current;
+ freeblock(fl_current);
+ fl_current = new_freeblock();
+ bs_super->freelist_current = allocblock(fl_current);
+ writeblock(BLOCKSTORE_SUPER, bs_super);
+ }
+
+ /* append id to current */
+ fl_current->list[fl_current->count++] = id;
+ writeblock(bs_super->freelist_current, fl_current);
+
+ freeblock(fl_current);
+ freeblock(bs_super);
+
+
+}
+
+/* freelist debug functions: */
+void freelist_count(int print_each)
+{
+ blockstore_super_t *bs_super;
+ freeblock_t *fb;
+ u64 total = 0, next;
+
+ bs_super = (blockstore_super_t *) readblock(BLOCKSTORE_SUPER);
+
+ if (bs_super->freelist_current == 0ULL) {
+ printf("freelist is empty!\n");
+ return;
+ }
+
+ fb = readblock(bs_super->freelist_current);
+ printf("%Ld entires on current.\n", fb->count);
+ total += fb->count;
+ if (print_each == 1)
+ {
+ int i;
+ for (i=0; i< fb->count; i++)
+ printf(" %Ld\n", fb->list[i]);
+ }
+
+ freeblock(fb);
+
+ if (bs_super->freelist_full == 0ULL) {
+ printf("freelist_full is empty!\n");
+ return;
+ }
+
+ next = bs_super->freelist_full;
+ for (;;) {
+ fb = readblock(next);
+ total += fb->count;
+ if (print_each == 1)
+ {
+ int i;
+ for (i=0; i< fb->count; i++)
+ printf(" %Ld\n", fb->list[i]);
+ }
+ next = fb->next;
+ freeblock(fb);
+ if (next == 0ULL) break;
+ }
+ printf("Total of %Ld ids on freelist.\n", total);
+}
int __init_blockstore(void)
{
+ int i;
+ blockstore_super_t *bs_super;
+ u64 ret;
+ int block_fp;
+
block_fp = open("blockstore.dat", O_RDWR | O_CREAT | O_LARGEFILE, 0644);
if (block_fp < 0) {
perror("open");
- return -1;
+ exit(-1);
}
+ if (lseek(block_fp, 0, SEEK_END) == 0) {
+ bs_super = newblock();
+ bs_super->magic = BLOCKSTORE_MAGIC;
+ bs_super->freelist_full = 0LL;
+ bs_super->freelist_current = 0LL;
+
+ ret = allocblock(bs_super);
+
+ freeblock(bs_super);
+ } else {
+ bs_super = (blockstore_super_t *) readblock(BLOCKSTORE_SUPER);
+ if (bs_super->magic != BLOCKSTORE_MAGIC)
+ {
+ printf("BLOCKSTORE IS CORRUPT! (no magic in superblock!)\n");
+ exit(-1);
+ }
+ freeblock(bs_super);
+ }
+
+ close(block_fp);
+
+
+ /*
+ for (i=0; i<(READ_POOL_SIZE+1); i++) {
+
+ fd_list[i] = open("blockstore.dat",
+ O_RDWR | O_CREAT | O_LARGEFILE, 0644);
+
+ if (fd_list[i] < 0) {
+ perror("open");
+ return -1;
+ }
+ }
+ */
return 0;
}
#define SECTOR_SHIFT 9
#endif
+#define FREEBLOCK_SIZE (BLOCK_SIZE / sizeof(u64)) - (3 * sizeof(u64))
+#define FREEBLOCK_MAGIC 0x0fee0fee0fee0fee
+
+typedef struct {
+ u64 magic;
+ u64 next;
+ u64 count;
+ u64 list[FREEBLOCK_SIZE];
+} freeblock_t;
+
+#define BLOCKSTORE_MAGIC 0xaaaaaaa00aaaaaaa
+#define BLOCKSTORE_SUPER 1ULL
+
+typedef struct {
+ u64 magic;
+ u64 freelist_full;
+ u64 freelist_current;
+} blockstore_super_t;
extern void *newblock();
extern void *readblock(u64 id);
extern u64 allocblock(void *block);
extern int writeblock(u64 id, void *block);
+
+/* Add this blockid to a freelist, to be recycled by the allocator. */
+extern void releaseblock(u64 id);
+
+/* this is a memory free() operation for block-sized allocations */
extern void freeblock(void *block);
extern int __init_blockstore(void);
+/* debug for freelist. */
+void freelist_count(int print_each);
+
#endif /* __BLOCKSTORE_H__ */
--- /dev/null
+/**************************************************************************
+ *
+ * parallax.c
+ *
+ * The Parallax Storage Server
+ *
+ */
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <pthread.h>
+#include "blktaplib.h"
+#include "blockstore.h"
+#include "vdi.h"
+#include "parallax-threaded.h"
+
+#define PARALLAX_DEV 61440
+
+
+#if 0
+#define DPRINTF(_f, _a...) printf ( _f , ## _a )
+#else
+#define DPRINTF(_f, _a...) ((void)0)
+#endif
+
+/* ------[ session records ]----------------------------------------------- */
+
+#define BLKIF_HASHSZ 1024
+#define BLKIF_HASH(_d,_h) (((int)(_d)^(int)(_h))&(BLKIF_HASHSZ-1))
+
+#define VDI_HASHSZ 16
+#define VDI_HASH(_vd) ((((_vd)>>8)^(_vd))&(VDI_HASHSZ-1))
+
+typedef struct blkif {
+ domid_t domid;
+ unsigned int handle;
+ enum { DISCONNECTED, DISCONNECTING, CONNECTED } status;
+ vdi_t *vdi_hash[VDI_HASHSZ];
+ struct blkif *hash_next;
+} blkif_t;
+
+static blkif_t *blkif_hash[BLKIF_HASHSZ];
+
+blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle)
+{
+ if ( handle != 0 )
+ printf("blktap/parallax don't currently support non-0 dev handles!\n");
+
+ blkif_t *blkif = blkif_hash[BLKIF_HASH(domid, handle)];
+ while ( (blkif != NULL) &&
+ ((blkif->domid != domid) || (blkif->handle != handle)) )
+ blkif = blkif->hash_next;
+ return blkif;
+}
+
+vdi_t *blkif_get_vdi(blkif_t *blkif, blkif_vdev_t device)
+{
+ vdi_t *vdi = blkif->vdi_hash[VDI_HASH(device)];
+
+ while ((vdi != NULL) && (vdi->vdevice != device))
+ vdi = vdi->next;
+
+ return vdi;
+}
+
+/* ------[ control message handling ]-------------------------------------- */
+
+void blkif_create(blkif_be_create_t *create)
+{
+ domid_t domid = create->domid;
+ unsigned int handle = create->blkif_handle;
+ blkif_t **pblkif, *blkif;
+
+ DPRINTF("parallax (blkif_create): create is %p\n", create);
+
+ if ( (blkif = (blkif_t *)malloc(sizeof(blkif_t))) == NULL )
+ {
+ DPRINTF("Could not create blkif: out of memory\n");
+ create->status = BLKIF_BE_STATUS_OUT_OF_MEMORY;
+ return;
+ }
+
+ memset(blkif, 0, sizeof(*blkif));
+ blkif->domid = domid;
+ blkif->handle = handle;
+ blkif->status = DISCONNECTED;
+/*
+ spin_lock_init(&blkif->vbd_lock);
+ spin_lock_init(&blkif->blk_ring_lock);
+ atomic_set(&blkif->refcnt, 0);
+*/
+ pblkif = &blkif_hash[BLKIF_HASH(domid, handle)];
+ while ( *pblkif != NULL )
+ {
+ if ( ((*pblkif)->domid == domid) && ((*pblkif)->handle == handle) )
+ {
+ DPRINTF("Could not create blkif: already exists\n");
+ create->status = BLKIF_BE_STATUS_INTERFACE_EXISTS;
+ free(blkif);
+ return;
+ }
+ pblkif = &(*pblkif)->hash_next;
+ }
+
+ blkif->hash_next = *pblkif;
+ *pblkif = blkif;
+
+ DPRINTF("Successfully created blkif\n");
+ create->status = BLKIF_BE_STATUS_OKAY;
+}
+
+void blkif_destroy(blkif_be_destroy_t *destroy)
+{
+ domid_t domid = destroy->domid;
+ unsigned int handle = destroy->blkif_handle;
+ blkif_t **pblkif, *blkif;
+
+ DPRINTF("parallax (blkif_destroy): destroy is %p\n", destroy);
+
+ pblkif = &blkif_hash[BLKIF_HASH(domid, handle)];
+ while ( (blkif = *pblkif) != NULL )
+ {
+ if ( (blkif->domid == domid) && (blkif->handle == handle) )
+ {
+ if ( blkif->status != DISCONNECTED )
+ goto still_connected;
+ goto destroy;
+ }
+ pblkif = &blkif->hash_next;
+ }
+
+ destroy->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
+ return;
+
+ still_connected:
+ destroy->status = BLKIF_BE_STATUS_INTERFACE_CONNECTED;
+ return;
+
+ destroy:
+ *pblkif = blkif->hash_next;
+ /* destroy_all_vbds(blkif); */
+ free(blkif);
+ destroy->status = BLKIF_BE_STATUS_OKAY;
+}
+
+void vbd_grow(blkif_be_vbd_grow_t *grow)
+{
+ blkif_t *blkif;
+ vdi_t *vdi, **vdip;
+ blkif_vdev_t vdevice = grow->vdevice;
+
+ DPRINTF("parallax (vbd_grow): grow=%p\n", grow);
+
+ blkif = blkif_find_by_handle(grow->domid, grow->blkif_handle);
+ if ( blkif == NULL )
+ {
+ DPRINTF("vbd_grow attempted for non-existent blkif (%u,%u)\n",
+ grow->domid, grow->blkif_handle);
+ grow->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
+ return;
+ }
+
+ /* VDI identifier is in grow->extent.sector_start */
+ DPRINTF("vbd_grow: grow->extent.sector_start (id) is %llx\n",
+ grow->extent.sector_start);
+
+ vdi = vdi_get(grow->extent.sector_start);
+ if (vdi == NULL)
+ {
+ printf("parallax (vbd_grow): VDI %llx not found.\n",
+ grow->extent.sector_start);
+ grow->status = BLKIF_BE_STATUS_VBD_NOT_FOUND;
+ return;
+ }
+
+ vdi->next = NULL;
+ vdi->vdevice = vdevice;
+ vdip = &blkif->vdi_hash[VDI_HASH(vdevice)];
+ while (*vdip != NULL)
+ vdip = &(*vdip)->next;
+ *vdip = vdi;
+
+ DPRINTF("vbd_grow: happy return!\n");
+ grow->status = BLKIF_BE_STATUS_OKAY;
+}
+
+int parallax_control(control_msg_t *msg)
+{
+ domid_t domid;
+ int ret;
+
+ DPRINTF("parallax_control: msg is %p\n", msg);
+
+ if (msg->type != CMSG_BLKIF_BE)
+ {
+ printf("Unexpected control message (%d)\n", msg->type);
+ return 0;
+ }
+
+ switch(msg->subtype)
+ {
+ case CMSG_BLKIF_BE_CREATE:
+ if ( msg->length != sizeof(blkif_be_create_t) )
+ goto parse_error;
+ blkif_create((blkif_be_create_t *)msg->msg);
+ break;
+
+ case CMSG_BLKIF_BE_DESTROY:
+ if ( msg->length != sizeof(blkif_be_destroy_t) )
+ goto parse_error;
+ blkif_destroy((blkif_be_destroy_t *)msg->msg);
+ break;
+
+ case CMSG_BLKIF_BE_VBD_GROW:
+ if ( msg->length != sizeof(blkif_be_vbd_grow_t) )
+ goto parse_error;
+ vbd_grow((blkif_be_vbd_grow_t *)msg->msg);
+ break;
+ }
+ return 0;
+parse_error:
+ printf("Bad control message!\n");
+ return 0;
+
+}
+
+int parallax_probe(blkif_request_t *req, blkif_t *blkif)
+{
+ blkif_response_t *rsp;
+ vdisk_t *img_info;
+ vdi_t *vdi;
+ int i, nr_vdis = 0;
+
+ DPRINTF("parallax_probe: req=%p, blkif=%p\n", req, blkif);
+
+ /* We expect one buffer only. */
+ if ( req->nr_segments != 1 )
+ goto err;
+
+ /* Make sure the buffer is page-sized. */
+ if ( (blkif_first_sect(req->frame_and_sects[0]) != 0) ||
+ (blkif_last_sect (req->frame_and_sects[0]) != 7) )
+ goto err;
+
+ /* fill the list of devices */
+ for (i=0; i<VDI_HASHSZ; i++) {
+ vdi = blkif->vdi_hash[i];
+ while (vdi) {
+ img_info = (vdisk_t *)MMAP_VADDR(ID_TO_IDX(req->id), 0);
+ img_info[nr_vdis].device = vdi->vdevice;
+ img_info[nr_vdis].info = VDISK_TYPE_DISK | VDISK_FLAG_VIRT;
+ /* The -2 here accounts for the LSB in the radix tree */
+ img_info[nr_vdis].capacity =
+ ((1LL << (VDI_HEIGHT-2)) >> SECTOR_SHIFT);
+ nr_vdis++;
+ vdi = vdi->next;
+ }
+ }
+
+
+ rsp = (blkif_response_t *)req;
+ rsp->id = req->id;
+ rsp->operation = BLKIF_OP_PROBE;
+ rsp->status = nr_vdis; /* number of disks */
+
+ DPRINTF("parallax_probe: send positive response (nr_vdis=%d)\n", nr_vdis);
+ return BLKTAP_RESPOND;
+err:
+ rsp = (blkif_response_t *)req;
+ rsp->id = req->id;
+ rsp->operation = BLKIF_OP_PROBE;
+ rsp->status = BLKIF_RSP_ERROR;
+
+ DPRINTF("parallax_probe: send error response\n");
+ return BLKTAP_RESPOND;
+}
+
+typedef struct {
+ blkif_request_t *req;
+ int count;
+ pthread_mutex_t mutex;
+} pending_t;
+
+#define MAX_REQUESTS 64
+pending_t pending_list[MAX_REQUESTS];
+
+typedef struct {
+ vdi_t *vdi;
+ blkif_request_t *req;
+ int segment;
+ pending_t *pent;
+} readseg_params_t;
+
+#define DISPATCH_SIZE 1024UL
+#define DISPATCH_MASK (DISPATCH_SIZE-1)
+readseg_params_t dispatch_list[DISPATCH_SIZE];
+unsigned long dprod = 0, dcons = 0;
+pthread_mutex_t dispatch_mutex;
+pthread_cond_t dispatch_cond;
+
+void *read_segment(void *param)
+{
+ readseg_params_t *p;
+ u64 vblock, gblock, sector;
+ char *dpage, *spage;
+ unsigned long size, start, offset;
+ blkif_response_t *rsp;
+ int tid;
+
+unsigned long dc, dp;
+
+#ifdef NOTHREADS
+#else
+ /* Set this thread's tid. */
+ tid = *(int *)param;
+ free(param);
+
+ pthread_setspecific(tid_key, (void *)tid);
+
+ printf("My tid is %d.\n", (int)pthread_getspecific(tid_key));
+start:
+ pthread_mutex_lock(&dispatch_mutex);
+ while (dprod == dcons)
+ pthread_cond_wait(&dispatch_cond, &dispatch_mutex);
+
+ if (dprod == dcons) {
+ /* unnecessary wakeup. */
+ pthread_mutex_unlock(&dispatch_mutex);
+ goto start;
+ }
+#endif
+dc = dcons;
+dp = dprod;
+
+ p = &dispatch_list[dcons & DISPATCH_MASK];
+ dcons++;
+#ifdef NOTHREADS
+#else
+ pthread_mutex_unlock(&dispatch_mutex);
+#endif
+ dpage = (char *)MMAP_VADDR(ID_TO_IDX(p->req->id), p->segment);
+
+ /* Round the requested segment to a block address. */
+
+ sector = p->req->sector_number + (8*p->segment);
+ vblock = (sector << SECTOR_SHIFT) >> BLOCK_SHIFT;
+
+ /* Get that block from the store. */
+
+ gblock = vdi_lookup_block(p->vdi, vblock, NULL);
+
+ /* Calculate read size and offset within the read block. */
+
+ offset = (sector << SECTOR_SHIFT) % BLOCK_SIZE;
+ size = ( blkif_last_sect (p->req->frame_and_sects[p->segment]) -
+ blkif_first_sect(p->req->frame_and_sects[p->segment]) + 1
+ ) << SECTOR_SHIFT;
+ start = blkif_first_sect(p->req->frame_and_sects[p->segment])
+ << SECTOR_SHIFT;
+
+ /* If the block does not exist in the store, return zeros. */
+ /* Otherwise, copy that region to the guest page. */
+
+// printf(" : (%p, %d, %d) (%d) [c:%lu,p:%lu]\n",
+// p->req, ID_TO_IDX(p->req->id), p->segment,
+// p->pent->count, dc, dp);
+
+ DPRINTF("ParallaxRead: sect: %lld (%ld,%ld), "
+ "vblock %llx, gblock %llx, "
+ "size %lx\n",
+ sector, blkif_first_sect(p->req->frame_and_sects[p->segment]),
+ blkif_last_sect (p->req->frame_and_sects[p->segment]),
+ vblock, gblock, size);
+
+ if ( gblock == 0 ) {
+
+ memset(dpage + start, '\0', size);
+
+ } else {
+
+ spage = readblock(gblock);
+
+ if (spage == NULL) {
+ printf("Error reading gblock from store: %Ld\n", gblock);
+ goto err;
+ }
+
+ memcpy(dpage + start, spage + offset, size);
+
+ freeblock(spage);
+ }
+
+
+ /* Done the read. Now update the pending record. */
+
+ pthread_mutex_lock(&p->pent->mutex);
+ p->pent->count--;
+
+ if (p->pent->count == 0) {
+
+// printf("FINISH: (%d, %d)\n", ID_TO_IDX(p->req->id), p->segment);
+ rsp = (blkif_response_t *)p->req;
+ rsp->id = p->req->id;
+ rsp->operation = BLKIF_OP_READ;
+ rsp->status = BLKIF_RSP_OKAY;
+
+ blktap_inject_response(rsp);
+ }
+
+ pthread_mutex_unlock(&p->pent->mutex);
+
+#ifdef NOTHREADS
+ return NULL;
+#else
+ goto start;
+#endif
+
+err:
+ printf("I am screwed!\n");
+#ifdef NOTHREADS
+ return NULL;
+#else
+ goto start;
+#endif
+}
+
+
+int parallax_read(blkif_request_t *req, blkif_t *blkif)
+{
+ blkif_response_t *rsp;
+ unsigned long size, offset, start;
+ u64 sector;
+ u64 vblock, gblock;
+ vdi_t *vdi;
+ int i;
+ char *dpage, *spage;
+ pending_t *pent;
+ readseg_params_t *params;
+
+ vdi = blkif_get_vdi(blkif, req->device);
+
+ if ( vdi == NULL )
+ goto err;
+
+// printf("START : (%p, %d, %d)\n", req, ID_TO_IDX(req->id), req->nr_segments);
+
+ pent = &pending_list[ID_TO_IDX(req->id)];
+ pent->count = req->nr_segments;
+ pent->req = req;
+ pthread_mutex_init(&pent->mutex, NULL);
+
+
+ for (i = 0; i < req->nr_segments; i++) {
+ pthread_t tid;
+ int ret;
+
+ params = &dispatch_list[dprod & DISPATCH_MASK];
+ params->pent = pent;
+ params->vdi = vdi;
+ params->req = req;
+ params->segment = i;
+ wmb();
+ dprod++;
+
+ pthread_mutex_lock(&dispatch_mutex);
+ pthread_cond_signal(&dispatch_cond);
+ pthread_mutex_unlock(&dispatch_mutex);
+#ifdef NOTHREADS
+ read_segment(NULL);
+#endif
+
+ }
+
+
+
+
+ return BLKTAP_STOLEN;
+
+err:
+ rsp = (blkif_response_t *)req;
+ rsp->id = req->id;
+ rsp->operation = BLKIF_OP_READ;
+ rsp->status = BLKIF_RSP_ERROR;
+
+ return BLKTAP_RESPOND;
+}
+
+int parallax_write(blkif_request_t *req, blkif_t *blkif)
+{
+ blkif_response_t *rsp;
+ u64 sector;
+ int i, writable = 0;
+ u64 vblock, gblock;
+ char *spage;
+ unsigned long size, offset, start;
+ vdi_t *vdi;
+
+ vdi = blkif_get_vdi(blkif, req->device);
+
+ if ( vdi == NULL )
+ goto err;
+
+ for (i = 0; i < req->nr_segments; i++) {
+
+ spage = (char *)MMAP_VADDR(ID_TO_IDX(req->id), i);
+
+ /* Round the requested segment to a block address. */
+
+ sector = req->sector_number + (8*i);
+ vblock = (sector << SECTOR_SHIFT) >> BLOCK_SHIFT;
+
+ /* Get that block from the store. */
+
+ gblock = vdi_lookup_block(vdi, vblock, &writable);
+
+ /* Calculate read size and offset within the read block. */
+
+ offset = (sector << SECTOR_SHIFT) % BLOCK_SIZE;
+ size = ( blkif_last_sect (req->frame_and_sects[i]) -
+ blkif_first_sect(req->frame_and_sects[i]) + 1
+ ) << SECTOR_SHIFT;
+ start = blkif_first_sect(req->frame_and_sects[i]) << SECTOR_SHIFT;
+
+ DPRINTF("ParallaxWrite: sect: %lld (%ld,%ld), "
+ "vblock %llx, gblock %llx, "
+ "size %lx\n",
+ sector, blkif_first_sect(req->frame_and_sects[i]),
+ blkif_last_sect (req->frame_and_sects[i]),
+ vblock, gblock, size);
+
+ /* XXX: For now we just freak out if they try to write a */
+ /* non block-sized, block-aligned page. */
+
+ if ((offset != 0) || (size != BLOCK_SIZE) || (start != 0)) {
+ printf("]\n] STRANGE WRITE!\n]\n");
+ goto err;
+ }
+
+ if (( gblock == 0 ) || ( writable == 0 )) {
+
+ gblock = allocblock(spage);
+ vdi_update_block(vdi, vblock, gblock);
+
+ } else {
+
+ /* write-in-place, no need to change mappings. */
+ writeblock(gblock, spage);
+
+ }
+
+ }
+
+ rsp = (blkif_response_t *)req;
+ rsp->id = req->id;
+ rsp->operation = BLKIF_OP_WRITE;
+ rsp->status = BLKIF_RSP_OKAY;
+
+ return BLKTAP_RESPOND;
+err:
+ rsp = (blkif_response_t *)req;
+ rsp->id = req->id;
+ rsp->operation = BLKIF_OP_WRITE;
+ rsp->status = BLKIF_RSP_ERROR;
+
+ return BLKTAP_RESPOND;
+}
+
+int parallax_request(blkif_request_t *req)
+{
+ blkif_response_t *rsp;
+ domid_t dom = ID_TO_DOM(req->id);
+ blkif_t *blkif = blkif_find_by_handle(dom, 0);
+
+ //DPRINTF("parallax_request: req=%p, dom=%d, blkif=%p\n", req, dom, blkif);
+
+ if (blkif == NULL)
+ goto err;
+
+ if ( req->operation == BLKIF_OP_PROBE ) {
+
+ return parallax_probe(req, blkif);
+
+ } else if ( req->operation == BLKIF_OP_READ ) {
+
+ return parallax_read(req, blkif);
+
+ } else if ( req->operation == BLKIF_OP_WRITE ) {
+
+ return parallax_write(req, blkif);
+
+ } else {
+ /* Unknown operation */
+ goto err;
+ }
+
+err:
+ rsp = (blkif_response_t *)req;
+ rsp->id = req->id;
+ rsp->operation = req->operation;
+ rsp->status = BLKIF_RSP_ERROR;
+ return BLKTAP_RESPOND;
+}
+
+void __init_parallax(void)
+{
+ memset(blkif_hash, 0, sizeof(blkif_hash));
+}
+
+
+
+int main(int argc, char *argv[])
+{
+ pthread_t read_pool[READ_POOL_SIZE];
+ int i, tid=0;
+
+ DPRINTF("parallax: starting.\n");
+ __init_blockstore();
+ DPRINTF("parallax: initialized blockstore...\n");
+ __init_vdi();
+ DPRINTF("parallax: initialized vdi registry etc...\n");
+ __init_parallax();
+ DPRINTF("parallax: initialized local stuff..\n");
+
+
+ pthread_mutex_init(&dispatch_mutex, NULL);
+ pthread_cond_init(&dispatch_cond, NULL);
+
+ pthread_key_create(&tid_key, NULL);
+ tid = 0;
+
+#ifdef NOTHREADS
+#else
+ for (i=0; i < READ_POOL_SIZE; i++) {
+ int ret, *t;
+ t = (int *)malloc(sizeof(int));
+ *t = tid++;
+ ret = pthread_create(&read_pool[i], NULL, read_segment, t);
+ if (ret != 0) printf("Error starting thread %d\n", i);
+ }
+#endif
+
+ pthread_setspecific(tid_key, (void *)tid);
+
+ printf("*My tid is %d.\n", (int)pthread_getspecific(tid_key));
+
+ blktap_register_ctrl_hook("parallax_control", parallax_control);
+ blktap_register_request_hook("parallax_request", parallax_request);
+ DPRINTF("parallax: added ctrl + request hooks, starting listen...\n");
+ blktap_listen();
+
+ return 0;
+}
--- /dev/null
+/**************************************************************************
+ *
+ * parallax-threaded.h
+ *
+ * a few thread-specific defines
+ *
+ */
+
+#ifndef __PARALLAX_THREADED_H__
+#define __PARALLAX_THREADED_H__
+
+#if 0
+/* Turn off threading. */
+#define NOTHREADS
+#endif
+
+#define READ_POOL_SIZE 128
+
+/* per-thread identifier */
+pthread_key_t tid_key;
+
+#endif /* __PARALLAX_THREADED_H__ */
+
#define PARALLAX_DEV 61440
-#if 1
+#if 0
#define DPRINTF(_f, _a...) printf ( _f , ## _a )
#else
#define DPRINTF(_f, _a...) ((void)0)
rsp = (blkif_response_t *)req;
rsp->id = req->id;
- rsp->operation = BLKIF_OP_WRITE;
+ rsp->operation = BLKIF_OP_READ;
rsp->status = BLKIF_RSP_OKAY;
return BLKTAP_RESPOND;
err:
rsp = (blkif_response_t *)req;
rsp->id = req->id;
- rsp->operation = BLKIF_OP_WRITE;
+ rsp->operation = BLKIF_OP_READ;
rsp->status = BLKIF_RSP_ERROR;
return BLKTAP_RESPOND;
return writable(root);
}
-void print_root(u64 root, int height, u64 val, FILE *dot_f)
+/**
+ * collapse: collapse a parent onto a child.
+ *
+ * NOTE: This assumes that parent and child really are, and further that
+ * there are no other children forked from this parent. (children of the
+ * child are okay...)
+ */
+
+int collapse(int height, u64 proot, u64 croot)
+{
+ int i, numlinks, ret, total = 0;
+ radix_tree_node pnode, cnode;
+
+//printf("proot: %Ld\n", getid(proot));
+ if (height == 0) {
+ height = -1; /* terminate recursion */
+ } else {
+ height = ((height - 1) / RADIX_TREE_MAP_SHIFT) * RADIX_TREE_MAP_SHIFT;
+ }
+ numlinks = (1UL << RADIX_TREE_MAP_SHIFT);
+
+ /* Terminal cases: */
+
+ if ( (getid(proot) == ZERO) || (getid(croot) == ZERO) )
+ return -1;
+
+ /* get roots */
+ if ((pnode = readblock(getid(proot))) == NULL)
+ return -1;
+
+ if ((cnode = readblock(getid(croot))) == NULL)
+ {
+ freeblock(pnode);
+ return -1;
+ }
+
+ /* For each writable link in proot */
+ for (i=0; i<numlinks; i++)
+ {
+ if ( pnode[i] == cnode[i] ) continue;
+
+ /* collapse (next level) */
+ /* if height != 0 and writable... */
+ if (( height >= 0 ) && ( iswritable(pnode[i]) ) )
+ {
+ //printf(" %Ld is writable (i=%d).\n", getid(pnode[i]), i);
+ ret = collapse(height, pnode[i], cnode[i]);
+ if (ret == -1)
+ {
+ total = -1;
+ } else {
+ total += ret;
+ }
+ }
+
+
+ }
+
+ /* if plink is writable, AND clink is writable -> free plink block */
+ if ( ( iswritable(proot) ) && ( iswritable(croot) ) )
+ {
+ releaseblock(getid(proot));
+ if (ret >=0) total++;
+ //printf(" Delete %Ld\n", getid(proot));
+ }
+//printf("done : %Ld\n", getid(proot));
+ return total;
+
+}
+
+
+void print_root(u64 root, int height, FILE *dot_f)
{
FILE *f;
int i;
getid(root), style[iswritable(root)], getid(root));
}
- /* base case--return val */
+ printf("print_root(%Ld)\n", getid(root));
+
+ /* base case */
if (height == 0) {
/* add a node and edge for each child root */
node = (radix_tree_node) readblock(getid(root));
return;
for (i = 0; i < RADIX_TREE_MAP_ENTRIES; i++) {
- if (node[i] != 0) {
+ if (node[i] != ZERO) {
fprintf(f, " n%Ld [%sshape=box,label=\"%Ld\"];\n",
getid(node[i]), style[iswritable(node[i])],
getid(node[i]));
getid(node[i]), i);
}
}
+ freeblock(node);
return;
}
/* add a node and edge for each child root */
for (i = 0; i < RADIX_TREE_MAP_ENTRIES; i++)
- if (node[i] != 0) {
+ if (node[i] != ZERO) {
fprintf(f, " n%Ld [%sshape=box,label=\"%Ld\"];\n",
getid(node[i]), style[iswritable(node[i])],
getid(node[i]));
- print_root(node[i], height-RADIX_TREE_MAP_SHIFT,
- val + (((u64)i)<<height), f);
+
+ print_root(node[i], height-RADIX_TREE_MAP_SHIFT, f);
fprintf(f, " n%Ld -> n%Ld [label=\"%d\"]\n", getid(root),
getid(node[i]), i);
}
-
- /*
-
- root = node[(key >> height) & RADIX_TREE_MAP_MASK];
- freeblock(state, getid(oldroot), node);
-
- if (height == 0)
- return root;
-
- height -= RADIX_TREE_MAP_SHIFT;
- */
- //}
+ freeblock(node);
/* write graph postamble */
if (dot_f == NULL) {
int main(int argc, char **argv) {
u64 key = ZERO, val = ZERO;
- u64 root = writable(ONE);
+ u64 root = writable(2ULL);
+ u64 p = ZERO, c = ZERO;
+ int v;
char buff[4096];
__init_blockstore();
if (lseek(fp, 0, SEEK_END) == 0) {
write(fp, buff, 4096);
}*/
-
+
+ allocblock(buff);
+
printf("Recognized commands:\n"
"Note: the LSB of a node number indicates if it is writable\n"
" root <node> set root to <node>\n"
" snapshot take a snapshot of the root\n"
" set <key> <val> set key=val\n"
" get <key> query key\n"
+ " c <proot> <croot> collapse\n"
+ " pr print tree to dot\n"
+ " pf <1=verbose> print freelist\n"
" quit\n"
"\nroot = %Ld\n", root);
for (;;) {
- print_root(root, 34, 0, NULL);
- system("dot radix.dot -Tps -o radix.ps");
+ //print_root(root, 34, NULL);
+ //system("dot radix.dot -Tps -o radix.ps");
printf("> ");
fflush(stdout);
} else if (sscanf(buff, " set %Ld %Ld", &key, &val) == 2) {
root = update(34, root, key, val);
printf("root = %Ld\n", root);
+ } else if (sscanf(buff, " c %Ld %Ld", &p, &c) == 2) {
+ v = collapse(34, p, c);
+ printf("reclaimed %d blocks.\n", v);
} else if (sscanf(buff, " get %Ld", &key) == 1) {
- val = lookup(34, root, key, NULL);
+ val = lookup(34, root, key);
printf("value = %Ld\n", val);
} else if (!strcmp(buff, "quit\n")) {
break;
root = snapshot(root);
printf("new root = %Ld\n", root);
} else if (sscanf(buff, " pr %Ld", &root) == 1) {
- print_root(root, 34, 0, NULL);
+ print_root(root, 34, NULL);
+ } else if (sscanf(buff, " pf %d", &v) == 1) {
+ freelist_count(v);
+ } else if (!strcmp(buff, "pf\n")) {
+ freelist_count(0);
} else {
printf("command not recognized\n");
}
u64 lookup(int height, u64 root, u64 key);
u64 update(int height, u64 root, u64 key, u64 val);
u64 snapshot(u64 root);
+int collapse(int height, u64 proot, u64 croot);
int isprivate(int height, u64 root, u64 key);
#endif /* __RADIX_H__ */
snap_id_t id = *old_id;
snap_block_t *blk = snap_get_block(id.block);
+ if ( rec->deleted == 1 ) {
+ printf("Attempt to append a deleted snapshot!\n");
+ return -1;
+ }
+
if ( blk->hdr.immutable != 0 ) {
printf("Attempt to snap an immutable snap block!\n");
return -1;
return 0;
}
+int snap_collapse(int height, snap_id_t *p_id, snap_id_t *c_id)
+{
+ snap_block_t *p_blk, *c_blk, *blk;
+ snap_rec_t *p_rec, *c_rec;
+ int ret = -1;
+
+ p_blk = snap_get_block(p_id->block);
+
+ if (p_blk == NULL) return(-1);
+
+ if (c_id->block == p_id->block)
+ {
+ c_blk = p_blk;
+ } else {
+ c_blk = snap_get_block(c_id->block);
+ }
+
+ if (p_blk == NULL) {
+ freeblock(p_blk);
+ return(-1);
+ }
+
+ /* parent and child must not be deleted. */
+ p_rec = &p_blk->snaps[p_id->index];
+ c_rec = &c_blk->snaps[c_id->index];
+ /*
+ if ( (p_rec->deleted == 1) || (c_rec->deleted == 1) ) {
+ printf("One of those snaps is already deleted.\n");
+ goto done;
+ }
+ */
+ /* first non-deleted thing in the log before child must be parent. */
+
+ /* XXX todo: text the range here for delete (and eventually fork) bits) */
+ /* for now, snaps must be consecutive, on the same log page: */
+
+ if ((p_id->block != c_id->block) || (p_id->index != c_id->index-1))
+ {
+ printf("Deleting non-consecutive snaps is not done yet.\n");
+ goto done;
+ }
+
+ /* mark parent as deleted XXX: may need to lock parent block here.*/
+ p_rec->deleted = 1;
+ writeblock(p_id->block, p_blk);
+
+ /* delete the parent */
+ printf("collapse(%Ld, %Ld)\n", p_rec->radix_root, c_rec->radix_root);
+ ret = collapse(height, p_rec->radix_root, c_rec->radix_root);
+
+ /* return the number of blocks reclaimed. */
+
+done:
+ if (c_blk != p_blk) freeblock(c_blk);
+ freeblock(p_blk);
+
+ return(ret);
+}
+
void snap_print_history(snap_id_t *snap_id)
{
snap_id_t id = *snap_id;
* Snapshot log on-disk data structure.
*
*/
-
+
+#include "radix.h"
#include "blockstore.h" /* for BLOCK_SIZE */
+#ifndef __SNAPLOG_H__
+#define __SNAPLOG_H__
+
typedef struct snap_id {
u64 block;
unsigned int index;
typedef struct snap_rec {
u64 radix_root;
struct timeval timestamp;
+ /* flags: */
+ unsigned deleted:1;
} snap_rec_t;
int snap_block_create(snap_id_t *parent_id, snap_id_t *new_id);
int snap_append(snap_id_t *id, snap_rec_t *rec, snap_id_t *new_id);
+int snap_collapse(int height, snap_id_t *p_id, snap_id_t *c_id);
void snap_print_history(snap_id_t *snap_id);
int snap_get_id(snap_id_t *id, snap_rec_t *target);
snap_block_t *snap_get_block(u64 block);
+
+#endif /* __SNAPLOG_H__ */
#include "radix.h"
#include "vdi.h"
-#define VDI_REG_BLOCK 1LL
-#define VDI_RADIX_ROOT writable(2)
+#define VDI_REG_BLOCK 2LL
+#define VDI_RADIX_ROOT writable(3)
#if 1
#define DPRINTF(_f, _a...) printf ( _f , ## _a )
rec.radix_root = vdi->radix_root;
gettimeofday(&rec.timestamp, NULL);
+ rec.deleted = 0;
vdi->radix_root = snapshot(vdi->radix_root);
ret = snap_append(&vdi->snap, &rec, &vdi->snap);
--- /dev/null
+/**************************************************************************
+ *
+ * vdi_snap_delete.c
+ *
+ * Delete a snapshot.
+ *
+ * This is not finished: right now it takes a snap n and calls
+ * snap_collapse(n,n+1).
+ *
+ * TODO: support for non-consecutive, non-same-block snaps
+ * Avoid forking probs.
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/time.h>
+#include "blockstore.h"
+#include "snaplog.h"
+#include "radix.h"
+#include "vdi.h"
+
+int main(int argc, char *argv[])
+{
+ snap_id_t id, c_id;
+ int ret;
+
+ __init_blockstore();
+ __init_vdi();
+
+ if ( argc != 3 ) {
+ printf("usage: %s <snap block> <snap idx>\n", argv[0]);
+ exit(-1);
+ }
+
+ id.block = (u64) atoll(argv[1]);
+ id.index = (unsigned int) atol (argv[2]);
+
+ c_id = id;
+ c_id.index++;
+
+ ret = snap_collapse(VDI_HEIGHT, &id, &c_id);
+
+ printf("Freed %d blocks.\n", ret);
+
+ return 0;
+}
sid = vdi->snap;
sid.index--;
- //printf("%8s%4s%21s %12s\n", "Block", "idx", "timestamp", "radix root");
- printf("%8s%4s%37s %12s\n", "Block", "idx", "timestamp", "radix root");
+ //printf("%8s%4s%21s %12s %1s\n", "Block", "idx", "timestamp",
+ // "radix root", "d");
+ printf("%8s%4s%37s %12s %1s\n", "Block", "idx", "timestamp",
+ "radix root", "d");
while (sid.block != 0) {
blk = snap_get_block(sid.block);
}
t = ctime(&blk->snaps[i].timestamp.tv_sec);
t[strlen(t)-1] = '\0';
- //printf("%8Ld%4u%14lu.%06lu %12Ld\n",
- printf("%8Ld%4u%30s %06lu %12Ld\n",
+ //printf("%8Ld%4u%14lu.%06lu %12Ld %1s\n",
+ printf("%8Ld%4u%30s %06lu %12Ld %1s\n",
sid.block, i,
//blk->snaps[i].timestamp.tv_sec,
t,
blk->snaps[i].timestamp.tv_usec,
- blk->snaps[i].radix_root);
+ blk->snaps[i].radix_root,
+ blk->snaps[i].deleted ? "*" : " ");
if ( max_snaps != -1 )
max_snaps--;
}